In [3]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline 
In [ ]:
#Data Resource: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE,https://github.com/CSSEGISandData/COVID-19
#Orginal version: 2/15/2020
#Version updated: 2/26/2020
    #Added latest confirm map(used iso 3-letter country code)
    #Added animation on confirm by regions bar charts
    #Changed maps' visual designs, rearranged color scales
#Version updated: 3/14/2020
    #Added User Interaction Program
    #Added US COVID-19 Map 
#Version updated: 3/26/2020
    #Complied with source data changes
    #Resigned interactive program
In [10]:
confirmed_raw = pd.read_excel('time_series_covid19_confirmed_global.xlsx').fillna(0)
death_raw = pd.read_excel('time_series_covid19_deaths_global.xlsx').fillna(0)
recovered_raw = pd.read_excel('time_series_covid19_recovered_global.xlsx').fillna(0)
geodata = pd.read_csv('geocodedata.csv')
In [15]:
confirmed = pd.merge(confirmed_raw, geodata, how = 'left').dropna()
death = pd.merge(death_raw, geodata, how = 'left').dropna()
recovered = pd.merge(recovered_raw, geodata, how = 'left').dropna()
In [17]:
cols = list(confirmed.columns)
cols = cols[:4]+[cols[-1]]+cols[4:-1]
confirmed = confirmed[cols]
death = death[cols]
recovered = recovered[cols]
In [18]:
#convert column name to datetime format
confirmedtimeframe = confirmed.iloc[:,5:len(confirmed.columns)]
confirmedtimeframe.columns = pd.to_datetime(confirmedtimeframe.columns).date
deathtimeframe = death.iloc[:,5:len(death.columns)]
deathtimeframe.columns = pd.to_datetime(deathtimeframe.columns).date
recoveredtimeframe = recovered.iloc[:,5:len(recovered.columns)]
recoveredtimeframe.columns = pd.to_datetime(recoveredtimeframe.columns).date
In [19]:
confirmed = pd.concat([confirmed.iloc[:,0:5],confirmedtimeframe],axis = 1,sort=True)
death = pd.concat([death.iloc[:,0:5], deathtimeframe],axis = 1,sort=True)
recovered = pd.concat([recovered.iloc[:,0:5], recoveredtimeframe],axis = 1,sort=True)
In [21]:
#Melt indivdual timeframe per region to 'Date' column
confirmed = confirmed.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Confirmed")
death = death.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Death")
recovered = recovered.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Recovered")
In [22]:
#Consolidate dataframe that's ready to be used
data = pd.concat([confirmed, death.iloc[:,6],recovered.iloc[:,6]], axis =1, sort = True)
In [151]:
#separate China region from and other regions worldwide
chinaregion = data.loc[(data['Country/Region'] == 'China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:]
ind = data.loc[(data['Country/Region'] == 'China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:].index
nonchinaregion = data.drop(data.index[ind],inplace = False).reset_index(drop = True)
In [82]:
#chinaregionpivot = pd.pivot_table(chinaregion_copy,index=["Country/Region","Province/State",'Date'], 
#                                 values = ['Confirmed','Recovered','Death'],
#                                 aggfunc = np.mean)
In [112]:
#Returns all countries affected as dictionaries
Countries_affected = list(data['Country/Region'].unique())
In [113]:
#Convert the list of countries to uppercase 
countries_affected_uc = list(map(lambda x: x.upper(), Countries_affected))
In [355]:
#Aggregate the number of cases per country/region
worldregion = pd.DataFrame(data.groupby(['Country/Region','Date'])['Date','Confirmed','Recovered','Death'].agg('sum')).reset_index()
worldregion_copy = worldregion.copy()
worldregion_copy['Country/Region'] = worldregion_copy['Country/Region'].str.upper() 
In [359]:
#Function to extract timeframe table per location
#region: worldregion (original)/worldregion_copy(uppercases)
#country: Country/Region in question

def getlocationdata(region, country):
    location = region.loc[region['Country/Region'] == country].reset_index()
    location['Day'] = np.arange(1,len(location)+1,1)
    return location
In [362]:
#Function to get the incremental cases each day per location
#region: worldregion (original)/worldregion_copy(uppercases)
#country: Country/Region in question

def getincremental(region, country):
    incc = []
    incd = []
    incr = []
    for i in range(len(getlocationdata_noncn(region, country))-1):
        c = getlocationdata(region, country)['Confirmed'][i+1]-getlocationdata(region, country)['Confirmed'][i]
        d = getlocationdata(region, country)['Death'][i+1]-getlocationdata(region, country)['Death'][i]
        r = getlocationdata(region, country)['Recovered'][i+1]-getlocationdata(region, country)['Recovered'][i]
        incc.append(c)
        incd.append(d)
        incr.append(r)

    dic = {'Incremental Confirmed': incc, 'Incremental Death': incd, 'Incremental Recovered': incr}

    incremental = pd.DataFrame(data = dic)
    incremental['Day'] = np.arange(1,len(incremental)+1,1)
    return incremental
In [373]:
#Function of ploting the line chart combining total confirms and incremental confirms in each location
def getconfirmeddata(confirmtable, incrementalctable, country): 
    fig = go.Figure()
    #plot the total confirms trend
    fig.add_trace(go.Scatter(x = confirmtable['Day'], y=confirmtable['Confirmed'], name='Confirmed',
                             mode='lines+markers',line=dict(color='royalblue', width=2, dash='dash')))
    #plot the incremental confirms trend
    fig.add_trace(go.Scatter(x = incrementalctable['Day'], y=incrementalctable['Incremental Confirmed'], name='Incremental Confirmed',
                             mode='lines+markers',line=dict(color='rgb(153,204,255)', width=2, dash='dash')))
    fig.update_layout(title='Confirms in ' + country,
                           xaxis_title='Days',
                           yaxis_title='Confirms')
    fig.show()
In [379]:
#Function of ploting the line chart combining total death and incremental death in each location
def getdeathdata(deathtable, incrementaldtable, country): 
    fig = go.Figure()
    #plot the total death trend
    fig.add_trace(go.Scatter(x = deathtable['Day'], y=deathtable['Death'],
                        name='Death', mode='lines+markers',line=dict(color='firebrick', width=2, dash='dot')))
    #plot the incremental death trend
    fig.add_trace(go.Scatter(x = incrementaldtable['Day'], y=incrementaldtable['Incremental Death'], name='Incremental Death',
                             mode='lines+markers',line=dict(color='rgb(255,153,153)', width=2, dash='dash')))
    fig.update_layout(title='Death in ' + country,
                       xaxis_title='Days',
                       yaxis_title='Number of Patients')
    fig.show()
In [380]:
#Function to set up an internactive program that shows the confirm and death cases in the countries you'd like to see
#Inputs are limited to countries affected (not case sensitive)
#Can cease or restart the program anytime you needed
#Feel free to hover on the lines for the exact day and number of cases. (day,# of cases)

def showgraph():
    print('This little interactive program shows you the trend of confirms and deaths in your selected country in COVID-19 pandemic from 1/22/2020.\n')
    def start_program():
        selection = input('Type in Country/Region:')
        if selection.upper() in countries_affected_uc:
            location = getlocationdata(worldregion_copy, selection.upper())
            incremental = getincremental(worldregion_copy, selection.upper())

            print('Trend of Confirmed Cases:')
            getconfirmeddata(location, incremental, selection.upper())
            print('Trend of Death Cases:')
            getdeathdata(location, incremental, selection.upper())
        else:
            print('Input not available, please try again.')
            
        restart = input("Would you like to restart this program?(y/n)")
        if restart.lower() == 'y':
            start_program()
        if restart.lower() == "n":
            print ("Ends program")
    start_program() 
            
In [381]:
showgraph()
This little interactive program shows you the trend of confirms and deaths in your selected country in COVID-19 pandemic from 1/22/2020.

Type in Country/Region:China
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:US
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:italy
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)Y
Type in Country/Region:SPAIN
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)Y
Type in Country/Region:GERMANY
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:Canada
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:fRANCE
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:United Kingdom
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:iran
Trend of Confirmed Cases:
Trend of Death Cases:
Would you like to restart this program?(y/n)n
Ends program
In [257]:
chinaregion['Date'] = pd.to_datetime(chinaregion['Date'])
chinaregion['Date'] = chinaregion['Date'].dt.strftime('%m.%d')
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [139]:
#Get the latest cases in China region
chinalatest = pd.DataFrame(chinaregion.groupby(['Province/State','Country/Region'])['Lat','Long','Code','Confirmed','Recovered','Death'].agg('max')).sort_values(by=['Confirmed'], ascending = False).reset_index()
In [158]:
#Plot latest confirms in China region
fig = px.bar(chinalatest, x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Latest Confirmed in China Region')
fig.show()
In [382]:
#Plot latest confirms in China region except Hubei Province
fig = px.bar(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Latest Confirmed in China Region (other than Hubei)')
fig.show()
In [258]:
#Shows the changes of confirmed cases since 1/22/20 in China region
#Click on 'Start' botton to start the animation
fig = px.bar(chinaregion, x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
             animation_frame="Date",animation_group="Province/State",hover_data=['Confirmed'], title = 'Confirmed in China Region')
fig.show()
In [259]:
#Shows the changes of confirmed cases since 1/22/20 in China region other than Hubei Province
#Click on 'Start' botton to start the animation
fig = px.bar(chinaregion.loc[chinaregion['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',
             color_discrete_sequence= px.colors.qualitative.Set3, animation_frame="Date",animation_group="Province/State",
             hover_data=['Confirmed'], title = 'Confirmed in China Region Excluding Hubei')
fig.show()
In [161]:
worldregion = pd.DataFrame(data.groupby(['Country/Region','Date'])['Date','Confirmed','Recovered','Death'].agg('sum')).reset_index()
worldlatest = pd.DataFrame(data.groupby(['Country/Region'])['Lat','Long','Code','Confirmed','Recovered','Death'].agg('max')).sort_values(by = 'Confirmed', ascending = False).reset_index()
In [252]:
worldregion_copy = worldregion.loc[worldregion['Confirmed'] >= 0,:]
In [253]:
worldregion_copy = worldregion_copy.sort_values(by = ['Date', 'Confirmed'], ascending = True)
In [254]:
worldregion_copy['Date'] = pd.to_datetime(worldregion_copy['Date'])
worldregion_copy['Date'] = worldregion_copy['Date'].dt.strftime('%m.%d')
In [163]:
#Plot the latest worldwide confirms 
fig = px.bar(worldlatest.loc[worldlatest['Confirmed'] >=500,:], x='Country/Region', y='Confirmed',color = 'Country/Region',color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Latest Confirmed Worldwide')
fig.show()
In [256]:
#Shows the changes of confirmed cases worldwide since 1/22/20
#Click on 'Start' botton to start the animation
#More information will show when you hover on the bars

fig = px.bar(worldregion_copy, x='Country/Region', y='Confirmed',color = 'Country/Region',
             color_discrete_sequence= px.colors.qualitative.Set3,animation_frame="Date",animation_group="Country/Region",
             hover_data=['Confirmed'], title = 'Worldwide Confirms')

fig.show()
In [260]:
chinalatest['Death Rate'] = chinalatest['Death']/chinalatest['Confirmed']
chinalatest['Recover Rate'] = chinalatest['Recovered']/chinalatest['Confirmed']
worldlatest['Death Rate'] = worldlatest['Death']/worldlatest['Confirmed']
worldlatest['Recover Rate'] = worldlatest['Recovered']/worldlatest['Confirmed']
In [149]:
#Death v.s. Confirmed in China region except Hubei Province
#Larger the size, higher the death rate

fig = px.scatter(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x="Confirmed", y="Death", size = 'Death Rate',
                 color="Province/State",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                 title = 'Deaths in Confirmed in Other Part of China')
fig.show()
In [261]:
#Death v.s. Confirmed worldwide
#Larger the size, higher the death rate

fig = px.scatter(worldlatest, x="Confirmed", y="Death", size = 'Death Rate',
                 color="Country/Region",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                 title = 'Deaths in Confirmed Worldwide')
fig.show()
In [351]:
#Spreading in all China regions since 1/22/2020
#Click on the 'Start' botton the start the animation

fig = px.scatter_geo(chinaregion, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Province/State", size_max=20,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]])
fig.update_geos(
    showcoastlines=True, coastlinecolor="rgb(153, 76, 0)",
    showland=True, landcolor="rgb(255, 204, 153)",
    showocean=True, oceancolor="White",
)
fig.update_layout(title='Confirmed in China thru Timeline')
fig.show()
In [340]:
worldregion = pd.DataFrame(data.groupby(['Country/Region','Date'])['Date','Lat','Long','Confirmed'].agg('sum')).reset_index()
worldregion['Date'] = pd.to_datetime(worldregion['Date'])
worldregion['Date'] = worldregion['Date'].dt.strftime('%m.%d')
In [347]:
worldregion.loc[worldregion['Country/Region'] == 'China','Lat'] = 30.9756
worldregion.loc[worldregion['Country/Region'] == 'China','Long'] = 112.2707
worldregion.loc[worldregion['Country/Region'] == 'United Kingdom','Lat'] = 55.3781
worldregion.loc[worldregion['Country/Region'] == 'United Kingdom','Long'] = -3.436
worldregion.loc[worldregion['Country/Region'] == 'Netherlands','Lat'] = 12.5186
worldregion.loc[worldregion['Country/Region'] == 'Netherlands','Long'] = -70.0358
worldregion.loc[worldregion['Country/Region'] == 'France','Lat'] = 46.2276
worldregion.loc[worldregion['Country/Region'] == 'France','Long'] = 2.2137
worldregion.loc[worldregion['Country/Region'] == 'Canada','Lat'] =49.2827
worldregion.loc[worldregion['Country/Region'] == 'Canada','Long'] = -123.1207
worldregion.loc[worldregion['Country/Region'] == 'Australia','Lat'] =-28.0167
worldregion.loc[worldregion['Country/Region'] == 'Australia','Long'] = 153.4
In [350]:
#Worldwide spreading since 1/22/2020
#Click on the 'Start' botton the start the animation

fig = px.scatter_geo(worldregion, 
                     lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Country/Region", size_max=30,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]],)
fig.update_geos(
    showcoastlines=True, coastlinecolor="rgb(153, 76, 0)",
    showland=True, landcolor="rgb(255, 204, 153)",
    showocean=True, oceancolor="White",
)
fig.update_layout(title='Confirmed Worldwide thru Timeline')
fig.show()
In [352]:
latestdata = pd.concat([chinalatest,nonchinalatest],axis = 0,sort=False)
latestbycode = pd.DataFrame(latestdata.groupby(['Code','Country/Region'])['Country/Region','Code','Confirmed','Recovered','Death'].agg('sum')).sort_values(by=['Confirmed'], ascending = False).reset_index()
In [353]:
#Latest worldwide confirms (Based on 3-letter country codes)

fig = go.Figure(data=go.Choropleth(
                locations = latestbycode['Code'],
                z = latestbycode['Confirmed'],
                text = latestbycode['Country/Region'],
                colorscale = [[0, 'rgb(255,222,173)'],
                             [0.001,"rgb(255,99,71)"], 
                             [0.005,"rgb(165,42,42)"],
                             [0.1,"rgb(178,34,34)"],
                             [1.0,"rgb(128,0,0)"]],
                autocolorscale=False,
                reversescale=False,
                marker_line_color='darkgray',
                marker_line_width=0.5,
                colorbar_title = 'Confirmed',
))

fig.update_layout(title='Latest Worldwide Confirmed')

fig.show()